WORKDIR=""
export PYTHONPATH=$WORKDIR


TASK=${1}
SUB_TASK=${2}
NSAMPLES=${3}
MODEL_TAG=${4}
MODEL_DIR=${5}
LR=${6}
BS=${7}
SRC_LEN=${8}
TRG_LEN=${9}
PATIENCE=${10}
EPOCH=${11}
WARMUP=${12}
MODEL_LOADDIR=${13}
DATA_TYPE=${14}
SEED=${15}

GRAD_ACC_STEPS=1


if [[ ${DATA_TYPE} == "trees" ]]; then
    datatype_flag="--parse_as_tree"
else
    datatype_flag=""
fi



FULL_MODEL_TAG=${MODEL_TAG}_${NSAMPLES}data_lr${LR}_bs${BS}_src${SRC_LEN}_trg${TRG_LEN}_pat${PATIENCE}_e${EPOCH}/seed_${SEED}

if [[ ${SUB_TASK} == none ]]; then
  OUTPUT_DIR=${MODEL_DIR}/${TASK}/${FULL_MODEL_TAG}
else
  OUTPUT_DIR=${MODEL_DIR}/${TASK}/${SUB_TASK}/${FULL_MODEL_TAG}
fi

CACHE_DIR=${OUTPUT_DIR}/cache_data
RES_DIR=${OUTPUT_DIR}/prediction
LOG=${OUTPUT_DIR}/train.log

mkdir -p ${OUTPUT_DIR}
mkdir -p ${CACHE_DIR}
mkdir -p ${RES_DIR}

RES_FN="${OUTPUT_DIR}/${TASK}_${MODEL_TAG}.txt"
SUMMARY_DIR="tensorboard"


if [[ $MODEL_TAG == roberta ]]; then
  MODEL_TYPE=roberta
  TOKENIZER=roberta-base
  MODEL_PATH=roberta-base
elif [[ $MODEL_TAG == codebert ]]; then
  MODEL_TYPE=roberta
  TOKENIZER=roberta-base
  MODEL_PATH=microsoft/codebert-base
elif [[ $MODEL_TAG == bart_base ]]; then
  MODEL_TYPE=bart
  TOKENIZER=facebook/bart-base
  MODEL_PATH=facebook/bart-base
elif [[ $MODEL_TAG == codet5_small ]]; then
  MODEL_TYPE=codet5
  TOKENIZER=Salesforce/codet5-small
  MODEL_PATH=Salesforce/codet5-small
elif [[ $MODEL_TAG == codet5_base ]]; then
  MODEL_TYPE=codet5
  TOKENIZER=Salesforce/codet5-base
  MODEL_PATH=Salesforce/codet5-base
elif [[ $MODEL_TAG == codet5_large ]]; then
  MODEL_TYPE=codet5
  TOKENIZER=Salesforce/codet5-large
  MODEL_PATH=Salesforce/codet5-large
elif [[ $MODEL_TAG == codet5_custom ]]; then
  MODEL_TYPE=codet5_custom
  TOKENIZER=${MODEL_LOADDIR}
  MODEL_PATH=${MODEL_LOADDIR}
elif [[ $MODEL_TAG == codet5_ablation_onlytok ]]; then
  MODEL_TYPE=codet5_custom
  TOKENIZER=${MODEL_LOADDIR}
  MODEL_PATH=Salesforce/codet5-base
elif [[ $MODEL_TAG == plbart ]]; then
  MODEL_TYPE=plbart
  TOKENIZER=uclanlp/plbart-base
  MODEL_PATH=uclanlp/plbart-base
elif [[ $MODEL_TAG == graphcodebert ]]; then
  MODEL_TYPE=graphcodebert
  TOKENIZER=microsoft/graphcodebert-base
  MODEL_PATH=microsoft/graphcodebert-base
elif [[ $MODEL_TAG == unixcoder ]]; then
  MODEL_TYPE=unixcoder
  TOKENIZER=microsoft/unixcoder-base
  MODEL_PATH=microsoft/unixcoder-base
elif [[ $MODEL_TAG == codegen ]]; then
  MODEL_TYPE=codegen
  TOKENIZER=Salesforce/codegen-350M-multi
  MODEL_PATH=Salesforce/codegen-350M-multi
  GRAD_ACC_STEPS=2
elif [[ $MODEL_TAG == codegen_custom ]]; then
  MODEL_TYPE=codegen
  TOKENIZER=${MODEL_LOADDIR}
  MODEL_PATH=${MODEL_LOADDIR}
  GRAD_ACC_STEPS=2
fi


if [[ ${TASK} == "translate" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --sub_task ${SUB_TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}


elif [[ ${TASK} == "concode" ]]; then
    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

elif [[ ${TASK} == "defect" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

elif [[ ${TASK} == "mbpp" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

elif [[ ${TASK} == "refine" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --sub_task ${SUB_TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

elif [[ ${TASK} == "summarize" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --sub_task ${SUB_TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

elif [[ ${TASK} == "conala" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --sub_task ${SUB_TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

elif [[ ${TASK} == "avatar" ]]; then

    python ${WORKDIR}/run_gen.py --do_train --do_eval --do_eval_bleu --do_test ${datatype_flag} \
    --task ${TASK} --sub_task ${SUB_TASK} --model_type ${MODEL_TYPE} --data_num ${NSAMPLES} \
    --num_train_epochs ${EPOCH} --warmup_steps ${WARMUP} --learning_rate ${LR}e-5 --patience ${PATIENCE} \
    --tokenizer_name=${TOKENIZER} --model_name_or_path=${MODEL_PATH} --data_dir ${WORKDIR}/data \
    --cache_path ${CACHE_DIR} --output_dir ${OUTPUT_DIR} --summary_dir ${SUMMARY_DIR} \
    --always_save_model --res_dir ${RES_DIR} --res_fn ${RES_FN} --gradient_accumulation_steps ${GRAD_ACC_STEPS} \
    --train_batch_size ${BS} --eval_batch_size ${BS} --max_source_length ${SRC_LEN} --seed ${SEED} \
    --max_target_length ${TRG_LEN} 2>&1 | tee ${LOG}

fi